{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "PB5MNwqhemiu",
   "metadata": {
    "id": "PB5MNwqhemiu"
   },
   "outputs": [],
   "source": [
    "%pip install sentence_transformers -q --user"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "IiFfv1pCIzn3KaWkZ9whQHB3",
   "metadata": {
    "executionInfo": {
     "elapsed": 5233,
     "status": "ok",
     "timestamp": 1715337400431,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "IiFfv1pCIzn3KaWkZ9whQHB3",
    "tags": []
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sentence_transformers import SentenceTransformer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "cMkBO9Zrh8Yp",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 2180,
     "status": "ok",
     "timestamp": 1715337403525,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "cMkBO9Zrh8Yp",
    "outputId": "1adb1afc-2d33-43f5-9e78-96be864e26f1"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
      "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
      "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
      "You will be able to reuse this secret in all of your notebooks.\n",
      "Please note that authentication is recommended but still optional to access public models or datasets.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "model = SentenceTransformer('paraphrase-MiniLM-L3-v2') # try more powerful model: all-mpnet-base-v2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "G9UX2FUneDWq",
   "metadata": {
    "executionInfo": {
     "elapsed": 144,
     "status": "ok",
     "timestamp": 1715337404534,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "G9UX2FUneDWq"
   },
   "outputs": [],
   "source": [
    "#Sentences we want to encode. Example:\n",
    "sentence = [\n",
    "    'This blanket has such a cozy temperature for me!',\n",
    "    'I am so much warmer and snug using this spread!',\n",
    "    'Taylor Swift was 34 years old in 2024.']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "1rD-yeh7eGl1",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 199,
     "status": "ok",
     "timestamp": 1715338234822,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "1rD-yeh7eGl1",
    "outputId": "dea6be9c-dad1-4a22-c2bd-93172dec2794"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.42288575  0.11801957  0.58989054 ... -0.07997329  0.16128035\n",
      "   0.15290555]\n",
      " [ 0.08642203 -0.01953124  0.39447883 ... -0.19359103  0.27264506\n",
      "   0.23386717]\n",
      " [ 0.02534104  0.47614297  0.11544248 ... -0.10391877  0.5816903\n",
      "  -0.7600804 ]]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(3, 384)"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Sentences are encoded by calling model.encode()\n",
    "embedding = model.encode(sentence)\n",
    "\n",
    "#Preview the embeddings\n",
    "print(embedding)\n",
    "embedding.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "wcfgqw4NjjMg",
   "metadata": {
    "executionInfo": {
     "elapsed": 215,
     "status": "ok",
     "timestamp": 1715338237387,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "wcfgqw4NjjMg"
   },
   "outputs": [],
   "source": [
    "# Euclidean Distance function\n",
    "def euclidean_distance(vec1, vec2):\n",
    "    return np.linalg.norm(vec1 - vec2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "8YRl-XeTjTqB",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 231,
     "status": "ok",
     "timestamp": 1715338238253,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "8YRl-XeTjTqB",
    "outputId": "91262f53-c98f-482b-d6b6-e949714afd24"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Euclidean Distance: Review 1 vs Review 2: 4.6202903\n",
      "Euclidean Distance: Review 1 vs Random Comment: 7.313547\n",
      "Euclidean Distance: Review 2 vs Random Comment: 6.3389034\n"
     ]
    }
   ],
   "source": [
    "# Euclidean Distance\n",
    "print(\"Euclidean Distance: Review 1 vs Review 2:\", euclidean_distance(embedding[0], embedding[1]))\n",
    "print(\"Euclidean Distance: Review 1 vs Random Comment:\", euclidean_distance(embedding[0], embedding[2]))\n",
    "print(\"Euclidean Distance: Review 2 vs Random Comment:\", euclidean_distance(embedding[1], embedding[2]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "X8GFCtAwePEb",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 203,
     "status": "ok",
     "timestamp": 1715338240507,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "X8GFCtAwePEb",
    "outputId": "ca1038a2-68fd-48a9-d429-462a1f5a29c4"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dot Product: Review 1 vs Review 2: 12.270497\n",
      "Dot Product: Review 1 vs Random Comment: -0.7654616\n",
      "Dot Product: Review 2 vs Random Comment: 0.95240986\n"
     ]
    }
   ],
   "source": [
    "# Dot Product\n",
    "print(\"Dot Product: Review 1 vs Review 2:\", np.dot(embedding[0], embedding[1]))\n",
    "print(\"Dot Product: Review 1 vs Random Comment:\", np.dot(embedding[0], embedding[2]))\n",
    "print(\"Dot Product: Review 2 vs Random Comment:\", np.dot(embedding[1], embedding[2]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "MlUlzG9afLjM",
   "metadata": {
    "executionInfo": {
     "elapsed": 147,
     "status": "ok",
     "timestamp": 1715338242502,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "MlUlzG9afLjM"
   },
   "outputs": [],
   "source": [
    "# Cosine Distance function\n",
    "def cosine_distance(vec1,vec2):\n",
    "  cosine = 1 - abs((np.dot(vec1, vec2)/(np.linalg.norm(vec1)*np.linalg.norm(vec2))))\n",
    "  return cosine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "sikwgBLPeQU8",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 153,
     "status": "ok",
     "timestamp": 1715338243571,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "sikwgBLPeQU8",
    "outputId": "f52eb3ba-1302-430c-be41-bce1edad97b2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cosine Distance: Review 1 vs Review 2: 0.4523802399635315\n",
      "Cosine Distance: Review 1 vs Random Comment: 0.9704556427896023\n",
      "Cosine Distance: Review 2 vs Random Comment: 0.9542623348534107\n"
     ]
    }
   ],
   "source": [
    "# Cosine Distance\n",
    "print(\"Cosine Distance: Review 1 vs Review 2:\", cosine_distance(embedding[0], embedding[1]))\n",
    "print(\"Cosine Distance: Review 1 vs Random Comment:\", cosine_distance(embedding[0], embedding[2]))\n",
    "print(\"Cosine Distance: Review 2 vs Random Comment:\", cosine_distance(embedding[1], embedding[2]))\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "CHAPTER8-DISTANCEMETRICS.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
